library(data.table)
library(dplyr)
library(DT)
library(ggplot2)
library(gtools)
library(vegan)
library(iNEXT)
library(fossil)
library(ggrepel)
Let’s read the dataset and remove the samples containing less than 1154 reads:
setwd("~/Documents/TFM2/TFM2_Taraspina_miTags_analysis/")
#read data
tb18_tax <- read.table(file="~/Documents/TFM2/TFM2_Taraspina_miTags_analysis/input/otu_table99_SUR_miTags_18S_classif.txt", head=TRUE, fill=TRUE)
#table dimensions and format before setting column names
dim(tb18_tax) # 8642 58
tb18_tax[1:5,1:5]
row.names(tb18_tax)<-tb18_tax[,1]
tb18_tax<-tb18_tax[,-1]
tb18_tax[is.na(tb18_tax)]<-"NA"
dim(tb18_tax) #8642 57
tb18_tax[1:5,1:5]
#table with taxonomic classification alone:
tb18_class <- tb18_tax[,56:57]
dim(tb18_class) #8642 2
tb18_class[1:5,1:2]
#table with occurence data alone
tb18_tax_occur <- tb18_tax[,1:55]
dim(tb18_tax_occur) #8642 55
tb18_tax_occur[1:5,1:5]
amplicons_per_sample_tb18<-colSums(tb18_tax_occur)
summary(amplicons_per_sample_tb18) #median of amplicons per sample = ~3086)
#we'll remove all samples with less than 1154 reads.
amplicons_per_sample_tb18[which(colSums(tb18_tax_occur)<1154)]
#remove samples with less than 1154 reads
tb18_tax_occur_min1154 <- tb18_tax_occur[,colSums(tb18_tax_occur) >= 1154]
dim(tb18_tax_occur_min1154) #8642 45
#remove samples with omitted in 16S dataset (so that we can compare the relative abundance of 16S and 18S OTUs considering the same samples)
tb18_tax_occur_min1154<-subset(tb18_tax_occur_min1154, select=-c(TARA_70_SUR_0d2_3,TARA_67_SUR_0d2_3))
dim(tb18_tax_occur_min1154) #8642 43
#let's check if we loose any OTU when excluding the mentioned stations.
tb18_tax_occur_min1154_no_cero<-tb18_tax_occur_min1154[,-(which(colSums(tb18_tax_occur_min1154)==0))]
dim(tb18_tax_occur_min1154_no_cero)
Table dimensions and content outline:
## [1] 8642 43
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1 0 0 0
## OTU_2 0 0 0
## OTU_3 0 0 0
## OTU_4 0 0 0
## OTU_5 0 0 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1 0 0
## OTU_2 0 0
## OTU_3 1 0
## OTU_4 0 0
## OTU_5 0 0
Minimum number of reads per station:
min(colSums(tb18_tax_occur_min1154))
## [1] 1154
Maximum number of reads per station:
max(colSums(tb18_tax_occur_min1154))
## [1] 23781
Identification of stations with higher number of reads:
amplicons_per_sample<-colSums(tb18_tax_occur_min1154)
amplicons_per_sample[which(colSums(tb18_tax_occur_min1154)>20000)]
## TARA_84_SUR_0d2_3
## 23781
Overall reads per sample:
Let’s normalize the original dataset by randomly subsampling 1154 reads in each station:
tb18_tax_occur_min1154_t<-t(tb18_tax_occur_min1154)
tb18_tax_occur_ss1154<-rrarefy(tb18_tax_occur_min1154_t, 1154)
The normalized table shows the following dimensions and format:
## [1] 43 8642
## OTU_1 OTU_2 OTU_3 OTU_4 OTU_5
## TARA_102_SUR_0d2_3 0 0 0 0 0
## TARA_109_SUR_0d2_3 0 0 0 0 0
## TARA_110_SUR_0d2_3 0 0 0 0 0
## TARA_111_SUR_0d2_3 0 0 1 0 0
## TARA_112_SUR_0d2_3 0 0 0 0 0
Its content fits with the expected normalization values (1154 reads per station):
rowSums(tb18_tax_occur_ss1154)
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## 1154 1154 1154
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3 TARA_122_SUR_0d2_3
## 1154 1154 1154
## TARA_123_SUR_0d2_3 TARA_124_SUR_0d2_3 TARA_125_SUR_0d2_3
## 1154 1154 1154
## TARA_128_SUR_0d2_3 TARA_132_SUR_0d2_3 TARA_133_SUR_0d2_3
## 1154 1154 1154
## TARA_137_SUR_0d2_3 TARA_138_SUR_0d2_3 TARA_140_SUR_0d2_3
## 1154 1154 1154
## TARA_142_SUR_0d2_3 TARA_145_SUR_0d2_3 TARA_146_SUR_0d2_3
## 1154 1154 1154
## TARA_148_SUR_0d2_3 TARA_149_SUR_0d2_3 TARA_150_SUR_0d2_3
## 1154 1154 1154
## TARA_151_SUR_0d2_3 TARA_152_SUR_0d2_3 TARA_56_SUR_0d2_3
## 1154 1154 1154
## TARA_57_SUR_0d2_3 TARA_62_SUR_0d2_3 TARA_64_SUR_0d2_3
## 1154 1154 1154
## TARA_65_SUR_0d2_3 TARA_66_SUR_0d2_3 TARA_68_SUR_0d2_3
## 1154 1154 1154
## TARA_72_SUR_0d2_3 TARA_76_SUR_0d2_3 TARA_78_SUR_0d2_3
## 1154 1154 1154
## TARA_82_SUR_0d2_3 TARA_84_SUR_0d2_3 TARA_85_SUR_0d2_3
## 1154 1154 1154
## TARA_96_SUR_0d2_3 TARA_98_SUR_0d2_3 TARA_99_SUR_0d2_3
## 1154 1154 1154
## MP0311 MP1517 MP1672
## 1154 1154 1154
## MP2821
## 1154
Let’s check out how many OTUs don’t appear in the new table:
length(which(colSums(tb18_tax_occur_ss1154)==0))
## [1] 2942
There are 2942 OTUs that don’t show any occurrence in the normalized data. Let’s remove them from the table and take a look at its final dimensions:
tb18_tax_occur_ss1154_no_cero<-tb18_tax_occur_ss1154[,-(which(colSums(tb18_tax_occur_ss1154)==0))]
tb18_tax_occur_ss1154_no_cero<-tb18_tax_occur_ss1154_no_cero[mixedorder(row.names(tb18_tax_occur_ss1154_no_cero)),]
dim(tb18_tax_occur_ss1154_no_cero)
## [1] 43 5700
Datasets summary:
dim(tb18_tax)
## [1] 8642 57
dim(tb18_tax_occur)
## [1] 8642 55
dim(tb18_tax_occur_ss1154_no_cero)
## [1] 43 5700
Most of the samples take Shannon Index values around 6:
Lowest number of OTUs per sample:
## [1] 348
Maximum number of OTUs per sample:
## [1] 791
In most of the samples, we can identify between 600 and 700 OTUs:
plot(sort(OTUs_per_sample_18S_tax_occur_ss1154), pch=19)
boxplot(OTUs_per_sample_18S_tax_occur_ss1154, pch=19)
pielou_evenness_18S_tax_occur_ss1154<-tb18_tax_occur_ss1154_div/log(OTUs_per_sample_18S_tax_occur_ss1154)
The Pielou index (constrained between 0 and 1) takes values closer to 1 as the variation of species proportion in a sample increases. Most of the samples get values around 0.95, meaning that the numerical composition of different OTUs in a sample is highly variable - there’s no constant presence of dominant species.
The less variation in communities between the species (and the presence of a dominant specie), the lower J’ is.
plot(sort(pielou_evenness_18S_tax_occur_ss1154), pch=19)
boxplot(pielou_evenness_18S_tax_occur_ss1154, pch=19)
The OTU_38, with 890 reads, is the most abundant in the overall dataset:
head(sort(colSums(tb18_tax_occur_ss1154_no_cero), decreasing=T), n=10L)
## OTU_38 OTU_3677 OTU_5415 OTU_751 OTU_3374 OTU_2494 OTU_2631 OTU_3569
## 874 695 571 524 429 361 310 282
## OTU_3177 OTU_1032
## 270 267
Most of the OTUs show very few occurrences; suggesting that we will probably be able to identify a significant ammount of rare otus:
plot(log(sort(colSums(tb18_tax_occur_ss1154_no_cero), decreasing=T)), pch=19)
The OTUs abundance distribution fits relativelly close to log-normal model.
According to Preston’s lognormal model fit into species frequencies groups, we’re missing 1588.816 species:
tb18_tax_occur_ss1154_prestonfit<-prestonfit(colSums(tb18_tax_occur_min1154_t))
plot(tb18_tax_occur_ss1154_prestonfit, main="Pooled species")
veiledspec(tb18_tax_occur_ss1154_prestonfit)
## Extrapolated Observed Veiled
## 9937.816 8349.000 1588.816
When computing Prestons’ lognormal model fit without pooling data into groups, we seem to miss 1385.371 species:
## Extrapolated Observed Veiled
## 9734.371 8349.000 1385.371
rarec_input<-t(as.matrix(colSums(tb18_tax_occur_ss1154_no_cero)))
#tb18_rarecurve_step1000_10000<-rarecurve(rarec_input, step = 1000, 10000, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity step=1000 & ss=10000\n(40,816 OTUs; 5,247,375 reads)\n")
#tb18_rarecurve_step1000_10000
tb18_rarecurve_step100_5700<-rarecurve(rarec_input, step = 100, 5700, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity step=100 & ss=5700\n(5,719 OTUs; 49,622 reads)\n")
#without rarefying
rarec_allOTUs_input<-t(as.matrix(colSums(t(tb18_tax_occur))))
tb18_rarecurve_allOTUs_step100_8642<-rarecurve(rarec_allOTUs_input, step = 100, 8642, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity non-rarefied step=1000 & ss=100000\n(8,642 OTUs; 244,184 reads)\n")
The Bray-Curtis dissimilarity, constrained between 0 (minimum distance) and 1 (highest dissimilarity) allows us to quantify the differences between samples according to the composition and relative abundance of their OTUs. In our dataset, most of the samples pairs take dissimilarity values around 0.8, meaning that their composition is substantially different.
The stations seem to form clusters according to geographic localization, but there are no evident clusters separated from the general groups, apart from the one containing TARA_84, TARA_82 and TARA_85.
(To be done: assign Longhurst provinces information to each station and check if any of the central clusters is meaningful regarding to the samples’ geographical location)
We can identify a prominent group in the central part of the NMDS plot and a few outliers (TARA 82, 84 and 85) in the central-right edge of the plot. The stress parameter takes a value below 0.2, suggesting that the plot is acceptable.
##
## Call:
## monoMDS(dist = tb18_tax_occur_ss1154_no_cero.bray)
##
## Non-metric Multidimensional Scaling
##
## 43 points, dissimilarity 'bray', call 'vegdist(x = tb18_tax_occur_ss1154_no_cero, method = "bray")'
##
## Dimensions: 2
## Stress: 0.1463937
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 70 iterations: Stress nearly unchanged (ratio > sratmax)
When implementing a most robut function for computing NMDS plots, the result is quiet the same:
## Run 0 stress 0.1465317
## Run 1 stress 0.1710814
## Run 2 stress 0.1529505
## Run 3 stress 0.1529514
## Run 4 stress 0.1463796
## ... New best solution
## ... Procrustes: rmse 0.0160749 max resid 0.09410719
## Run 5 stress 0.1450501
## ... New best solution
## ... Procrustes: rmse 0.03346756 max resid 0.2057251
## Run 6 stress 0.1544138
## Run 7 stress 0.1529513
## Run 8 stress 0.1464534
## Run 9 stress 0.1645093
## Run 10 stress 0.1464532
## Run 11 stress 0.1529505
## Run 12 stress 0.145052
## ... Procrustes: rmse 0.0005393131 max resid 0.002027162
## ... Similar to previous best
## Run 13 stress 0.1450487
## ... New best solution
## ... Procrustes: rmse 0.002278156 max resid 0.01146875
## Run 14 stress 0.154718
## Run 15 stress 0.154417
## Run 16 stress 0.1450194
## ... New best solution
## ... Procrustes: rmse 0.003631269 max resid 0.01466868
## Run 17 stress 0.1546418
## Run 18 stress 0.1522817
## Run 19 stress 0.1522815
## Run 20 stress 0.1544133
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
## Warning in ordiplot(x, choices = choices, type = type, display = display, :
## Species scores not available
## Warning in if (class(lats) == "SpatialPoints") lats <- coordinates(lats):
## the condition has length > 1 and only the first element will be used
Working datasets:
dim(tb18_tax_occur_ss1154_no_cero)
## [1] 43 5728
tb18_tax_occur_ss1154_no_cero[1:5, 1:5]
## OTU_1 OTU_2 OTU_3 OTU_5 OTU_6
## MP0311 0 0 0 0 0
## MP1517 0 0 0 0 1
## MP1672 0 0 0 0 0
## MP2821 0 3 0 0 0
## TARA_102_SUR_0d2_3 0 0 0 0 0
tb18_tax_occur_ss1154_no_cero.bray<-as.matrix(tb18_tax_occur_ss1154_no_cero.bray)
## [1] 43 43
dim(geo_distances_MP_18S)
## [1] 43 43
Communities quickly change their composition across geographical distances:
plot(geo_distances_MP_18S, tb18_tax_occur_ss1154_no_cero.bray, pch=19, cex=0.4, xlab="Geopgraphical distances", ylab="Bray-Curtis dissimilarities")
Mantel statistic is -significantlly- so low, meaning that the correlation between samples dissimilarity and geographical distances is weak.
mantel(geo_distances_MP_18S, tb18_tax_occur_ss1154_no_cero.bray)
##
## Mantel statistic based on Pearson's product-moment correlation
##
## Call:
## mantel(xdis = geo_distances_MP_18S, ydis = tb18_tax_occur_ss1154_no_cero.bray)
##
## Mantel statistic r: 0.1186
## Significance: 0.018
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.0646 0.0894 0.1075 0.1334
## Permutation: free
## Number of permutations: 999
Maximum distance between samples:
## [1] 19543.94
Minimum distance between samples:
## [1] 0
Correlograms:
MP_18s_ss1154_mantel_correl_by_1000km<-mantel.correlog(tb18_tax_occur_ss1154_no_cero.bray, D.geo=geo_distances_MP_18S, break.pts=seq(0,20000, by=1000))
plot(MP_18s_ss1154_mantel_correl_by_1000km)
MP_18s_ss1154_mantel_correl_by_100km<-mantel.correlog(tb18_tax_occur_ss1154_no_cero.bray, D.geo=geo_distances_MP_18S, break.pts=seq(0,20000, by=100))
plot(MP_18s_ss1154_mantel_correl_by_100km)
In the following plot, we can appreciate the OTUs distribution according to their percentage of occurence and relative abundance. The red line keeps up OTUs that occur in more than 80% of the samples, the green line limits regionally rare OTUs (< 0.001%), and the blue one restricts regionally abundant OTUs (> 0.1%).
Regionally abundant OTUs (relative abundance over 0.1%):
## mean_rabund perc_occur
## OTU_38 0.017492241 88.372093
## OTU_3677 0.013884970 81.395349
## OTU_751 0.010983032 37.209302
## OTU_5415 0.010761356 25.581395
## OTU_3374 0.008826730 93.023256
## OTU_2494 0.006932409 69.767442
## OTU_2631 0.006106163 74.418605
## OTU_3177 0.005682963 48.837209
## OTU_1032 0.005300069 72.093023
## OTU_3569 0.005159002 53.488372
## OTU_933 0.004796260 90.697674
## OTU_3282 0.004211842 37.209302
## OTU_2910 0.004070775 72.093023
## OTU_5120 0.004070775 39.534884
## OTU_3729 0.003909556 79.069767
## OTU_2200 0.003728185 86.046512
## OTU_1921 0.003687880 76.744186
## OTU_1383 0.003627423 88.372093
## OTU_3454 0.003607271 72.093023
## OTU_3227 0.003466205 34.883721
## OTU_1597 0.003385595 69.767442
## OTU_4176 0.003385595 34.883721
## OTU_4144 0.003244529 44.186047
## OTU_4227 0.003204224 32.558140
## OTU_1456 0.002901939 72.093023
## OTU_1833 0.002720567 72.093023
## OTU_974 0.002680263 67.441860
## OTU_4125 0.002599653 79.069767
## OTU_3038 0.002539196 76.744186
## OTU_4123 0.002519044 34.883721
## OTU_4103 0.002418282 37.209302
## OTU_2031 0.002377978 67.441860
## OTU_4518 0.002377978 44.186047
## OTU_3271 0.002357825 32.558140
## OTU_950 0.002317520 4.651163
## OTU_896 0.002297368 74.418605
## OTU_380 0.002156302 83.720930
## OTU_3871 0.002156302 69.767442
## OTU_2654 0.002115997 65.116279
## OTU_2996 0.002055540 69.767442
## OTU_949 0.002055540 65.116279
## OTU_3926 0.002015235 69.767442
## OTU_893 0.002015235 55.813953
## OTU_3560 0.001974930 62.790698
## OTU_2553 0.001974930 83.720930
## OTU_3056 0.001934626 72.093023
## OTU_973 0.001934626 67.441860
## OTU_1018 0.001874169 13.953488
## OTU_758 0.001874169 65.116279
## OTU_717 0.001833864 76.744186
## OTU_1047 0.001813712 67.441860
## OTU_1746 0.001793559 53.488372
## OTU_4260 0.001793559 41.860465
## OTU_256 0.001773407 76.744186
## OTU_2269 0.001753255 86.046512
## OTU_2195 0.001733102 72.093023
## OTU_2965 0.001733102 67.441860
## OTU_4134 0.001733102 48.837209
## OTU_2648 0.001712950 83.720930
## OTU_1343 0.001712950 69.767442
## OTU_2868 0.001712950 48.837209
## OTU_4226 0.001672645 37.209302
## OTU_2686 0.001632340 72.093023
## OTU_2043 0.001632340 60.465116
## OTU_1641 0.001612188 74.418605
## OTU_2305 0.001612188 74.418605
## OTU_3018 0.001612188 72.093023
## OTU_1496 0.001551731 69.767442
## OTU_2518 0.001551731 53.488372
## OTU_4533 0.001551731 39.534884
## OTU_4090 0.001551731 30.232558
## OTU_216 0.001531579 67.441860
## OTU_1997 0.001471122 74.418605
## OTU_2207 0.001450969 72.093023
## OTU_1635 0.001450969 69.767442
## OTU_3124 0.001450969 58.139535
## OTU_437 0.001450969 76.744186
## OTU_793 0.001430817 60.465116
## OTU_4089 0.001430817 37.209302
## OTU_3602 0.001410665 65.116279
## OTU_884 0.001390512 76.744186
## OTU_2120 0.001390512 58.139535
## OTU_3430 0.001390512 34.883721
## OTU_2853 0.001350208 69.767442
## OTU_2887 0.001350208 69.767442
## OTU_1551 0.001350208 65.116279
## OTU_4203 0.001350208 53.488372
## OTU_1066 0.001330055 58.139535
## OTU_2189 0.001330055 58.139535
## OTU_1929 0.001330055 48.837209
## OTU_1395 0.001330055 25.581395
## OTU_2594 0.001309903 69.767442
## OTU_1087 0.001289751 55.813953
## OTU_1446 0.001289751 69.767442
## OTU_1694 0.001289751 55.813953
## OTU_2730 0.001289751 53.488372
## OTU_983 0.001269598 55.813953
## OTU_2986 0.001249446 60.465116
## OTU_4190 0.001249446 30.232558
## OTU_2124 0.001229293 72.093023
## OTU_1628 0.001229293 58.139535
## OTU_628 0.001229293 58.139535
## OTU_922 0.001229293 58.139535
## OTU_6181 0.001229293 23.255814
## OTU_2078 0.001209141 67.441860
## OTU_938 0.001188989 67.441860
## OTU_990 0.001188989 67.441860
## OTU_1001 0.001188989 60.465116
## OTU_5484 0.001188989 34.883721
## OTU_2158 0.001168836 67.441860
## OTU_3196 0.001168836 58.139535
## OTU_2873 0.001148684 72.093023
## OTU_825 0.001148684 44.186047
## OTU_1261 0.001128532 58.139535
## OTU_2456 0.001128532 58.139535
## OTU_2966 0.001128532 58.139535
## OTU_914 0.001128532 58.139535
## OTU_458 0.001108379 60.465116
## OTU_1825 0.001108379 55.813953
## OTU_2089 0.001108379 46.511628
## OTU_5750 0.001108379 25.581395
## OTU_6608 0.001108379 18.604651
## OTU_2086 0.001068075 65.116279
## OTU_1911 0.001068075 48.837209
## OTU_599 0.001068075 48.837209
## OTU_1901 0.001047922 48.837209
## OTU_2219 0.001027770 67.441860
## OTU_1003 0.001027770 62.790698
## OTU_1975 0.001027770 58.139535
## OTU_295 0.001027770 58.139535
## OTU_2906 0.001027770 48.837209
## OTU_4110 0.001027770 41.860465
## OTU_373 0.001007618 51.162791
## OTU_1276 0.001007618 48.837209
## OTU_2466 0.001007618 46.511628
## OTU_3813 0.001007618 39.534884
## OTU_1012 0.001007618 60.465116
## OTU_363 0.001007618 44.186047
dim(tb18_ss1154_abundant_sorted)
## [1] 138 2
Proportion of regionally abundant OTUs (%):
## [1] 2.409218
Cosmopolitan OTUs (relative abundance over 0.1% and occurence in more than 80% of samples):
## mean_rabund perc_occur
## OTU_3374 0.008826730 93.02326
## OTU_933 0.004796260 90.69767
## OTU_38 0.017492241 88.37209
## OTU_1383 0.003627423 88.37209
## OTU_2200 0.003728185 86.04651
## OTU_2269 0.001753255 86.04651
## OTU_380 0.002156302 83.72093
## OTU_2553 0.001974930 83.72093
## OTU_2648 0.001712950 83.72093
## OTU_3677 0.013884970 81.39535
dim(otu_tb18_ss1154_cosmop_sorted)
## [1] 10 2
Number and proportion (%) of cosmopolitan OTUs:
## [1] 10
## [1] 0.174581
Number and proportion (%) of rare OTUs:
nrow(otu_tb18_ss1154_rabund_percoccur[otu_tb18_ss1154_rabund_percoccur$mean_rabund < 0.00001 & otu_tb18_ss1154_rabund_percoccur$mean_rabund >0,])
## [1] 0
## [1] 0
Let’s add the taxonomic classification by merging “tb18_tax_occur_ss1154_no_cero” with “tb18_tax”:
## [1] 5728 46
## Row.names MP0311 MP1517 MP1672 MP2821
## 1 OTU_1 0 0 0 0
## 2 OTU_10 0 1 3 0
## 3 OTU_100 0 0 0 0
## 4 OTU_1000 2 1 0 0
## 5 OTU_1001 0 2 0 5
## [1] 5728 45
## MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_1 0 0 0 0 0
## OTU_10 0 1 3 0 0
## OTU_100 0 0 0 0 0
## OTU_1000 2 1 0 0 0
## OTU_1001 0 2 0 5 0
## [1] 5728 46
## MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_1 0 0 0 0 0
## OTU_2 0 0 0 3 0
## OTU_3 0 0 0 0 0
## OTU_5 0 0 0 0 0
## OTU_6 0 1 0 0 0
## [1] 1905 46
## MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_14 0 0 0 0 0
## OTU_19 0 0 0 0 0
## OTU_29 0 0 0 0 0
## OTU_72 0 0 0 0 0
## OTU_74 0 0 0 0 0
## MP0311 MP1517 MP1672 MP2821 TARA_102_SUR_0d2_3
## OTU_14 0 0 0 0 0
## OTU_19 0 0 0 0 0
## OTU_29 0 0 0 0 0
## OTU_72 0 0 0 0 0
## OTU_74 0 0 0 0 0
## [1] 1044 43
## [1] 43
## [1] 32
## [1] 41
## [1] 43
## [1] 43
## [1] 27
## [1] 39
## [1] 34
## [1] 35
## [1] 0
## [1] 43
## [1] 39
## [1] 35
## [1] 18
## [1] 2
## [1] 0
## [1] 14
## [1] 2
## [1] 0
## [1] 5
## [1] 0
## [1] 0
## [1] 12
## [1] 25
## [1] 4
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 36
## [1] 16
## [1] 21
## Group.1 x
## 8 Dinophyceae 5453
## 17 Prymnesiophyceae 5180
## 10 Mamiellophyceae 3173
## 13 Pelagophyceae 2206
## 16 Prasinophyceae_clade-VII 1367
## 7 Dictyochophyceae 622
## 5 Chrysophyceae 599
## 6 Cryptophyceae 380
## 1 Bacillariophyceae 287
## 3 Chlorarachniophyceae 136
## 2 Bolidophyceae 132
## 18 Pyramimonadaceae 111
## 23 other_Prasinophyceae 93
## 9 Eustigmatophyceae 88
## 15 Prasinophyceae_clade-IX 53
## 12 Pavlovophyceae 51
## 20 Trebouxiophyceae 39
## 4 Chlorophyceae 25
## 11 Nephroselmidophyceae 19
## 22 Xanthophyceae 5
## 19 Rhodophyceae 4
## 21 Ulvophyceae 4
## 14 Phaeophyceae 2
## Group.1 x
## 8 Dinophyceae 1044
## 17 Prymnesiophyceae 165
## 5 Chrysophyceae 134
## 1 Bacillariophyceae 125
## 10 Mamiellophyceae 102
## 6 Cryptophyceae 57
## 7 Dictyochophyceae 48
## 16 Prasinophyceae_clade-VII 30
## 20 Trebouxiophyceae 30
## 13 Pelagophyceae 23
## 2 Bolidophyceae 21
## 4 Chlorophyceae 20
## 9 Eustigmatophyceae 19
## 23 other_Prasinophyceae 19
## 3 Chlorarachniophyceae 16
## 18 Pyramimonadaceae 14
## 15 Prasinophyceae_clade-IX 13
## 11 Nephroselmidophyceae 7
## 12 Pavlovophyceae 5
## 19 Rhodophyceae 4
## 22 Xanthophyceae 4
## 21 Ulvophyceae 3
## 14 Phaeophyceae 2
## reads_per_class OTUs_per_class
## Bacillariophyceae 287 125
## Bolidophyceae 132 21
## Chlorarachniophyceae 136 16
## Chlorophyceae 25 20
## Chrysophyceae 599 134
## reads_per_class OTUs_per_class samples_per_class
## Dinophyceae 5453 1044 43
## Prymnesiophyceae 5180 165 43
## Mamiellophyceae 3173 102 40
## Pelagophyceae 2206 23 42
## Prasinophyceae_clade-VII 1367 30 39
## Dictyochophyceae 622 48 43
## Chrysophyceae 599 134 42
## Cryptophyceae 380 57 32
## Bacillariophyceae 287 125 38
## Chlorarachniophyceae 136 16 35
## Bolidophyceae 132 21 28
## Pyramimonadaceae 111 14 36
## other_Prasinophyceae 93 19 30
## Eustigmatophyceae 88 19 22
## Prasinophyceae_clade-IX 53 13 16
## Pavlovophyceae 51 5 26
## Trebouxiophyceae 39 30 17
## Chlorophyceae 25 20 13
## Nephroselmidophyceae 19 7 11
## Xanthophyceae 5 4 1
## Rhodophyceae 4 4 3
## Ulvophyceae 4 3 1
## Phaeophyceae 2 2 1
## reads_per_class OTUs_per_class samples_per_class
## 100 100 1400
## reads_per_class OTUs_per_class samples_per_class
## Dinophyceae 27.225522992 54.8031496 100.000000
## Prymnesiophyceae 25.862499376 8.6614173 100.000000
## Mamiellophyceae 15.842029058 5.3543307 93.023256
## Pelagophyceae 11.014029657 1.2073491 97.674419
## Prasinophyceae_clade-VII 6.825103600 1.5748031 90.697674
## Dictyochophyceae 3.105497029 2.5196850 100.000000
## Chrysophyceae 2.990663538 7.0341207 97.674419
## Cryptophyceae 1.897248989 2.9921260 74.418605
## Bacillariophyceae 1.432922263 6.5616798 88.372093
## Chlorarachniophyceae 0.679015428 0.8398950 81.395349
## Bolidophyceae 0.659044386 1.1023622 65.116279
## Pyramimonadaceae 0.554196415 0.7349081 83.720930
## other_Prasinophyceae 0.464326726 0.9973753 69.767442
## Eustigmatophyceae 0.439362924 0.9973753 51.162791
## Prasinophyceae_clade-IX 0.264616306 0.6824147 37.209302
## Pavlovophyceae 0.254630785 0.2624672 60.465116
## Trebouxiophyceae 0.194717659 1.5748031 39.534884
## Chlorophyceae 0.124819012 1.0498688 30.232558
## Nephroselmidophyceae 0.094862449 0.3674541 25.581395
## Xanthophyceae 0.024963802 0.2099738 2.325581
## Rhodophyceae 0.019971042 0.2099738 6.976744
## Ulvophyceae 0.019971042 0.1574803 2.325581
## Phaeophyceae 0.009985521 0.1049869 2.325581
Reads per class vs. OTUs per class [PLOT DESCRIPTION]
## Warning in trans$transform(limits): NaNs produced
## Warning in trans$transform(limits): NaNs produced
## Warning in trans$transform(limits): NaNs produced
## [1] 8642 46
## Row.names TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## 1 OTU_1 0 0 0
## 2 OTU_10 0 3 2
## 3 OTU_100 0 0 0
## 4 OTU_1000 0 0 0
## 5 OTU_1001 0 1 0
## TARA_111_SUR_0d2_3
## 1 0
## 2 9
## 3 0
## 4 0
## 5 2
## [1] 8642 45
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1 0 0 0
## OTU_10 0 3 2
## OTU_100 0 0 0
## OTU_1000 0 0 0
## OTU_1001 0 1 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1 0 0
## OTU_10 9 2
## OTU_100 0 3
## OTU_1000 0 2
## OTU_1001 2 15
## [1] 8642 46
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1 0 0 0
## OTU_2 0 0 0
## OTU_3 0 0 0
## OTU_4 0 0 0
## OTU_5 0 0 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1 0 0
## OTU_2 0 0
## OTU_3 1 0
## OTU_4 0 0
## OTU_5 0 0
## [1] 2973 46
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_14 0 0 0
## OTU_19 0 0 0
## OTU_29 0 1 0
## OTU_34 0 0 0
## OTU_72 0 0 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_14 0 0
## OTU_19 0 0
## OTU_29 0 0
## OTU_34 0 0
## OTU_72 0 2
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_14 0 0 0
## OTU_19 0 0 0
## OTU_29 0 1 0
## OTU_34 0 0 0
## OTU_72 0 0 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_14 0 0
## OTU_19 0 0
## OTU_29 0 0
## OTU_34 0 0
## OTU_72 0 2
## [1] 1500 43
## [1] 43
## [1] 41
## [1] 43
## [1] 43
## [1] 43
## [1] 39
## [1] 42
## [1] 41
## [1] 43
## [1] 0
## [1] 43
## [1] 41
## [1] 42
## [1] 26
## [1] 3
## [1] 0
## [1] 26
## [1] 4
## [1] 1
## [1] 14
## [1] 0
## [1] 2
## [1] 19
## [1] 37
## [1] 13
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## [1] 42
## [1] 29
## [1] 33
## Group.1 x
## 19 Prymnesiophyceae 34758
## 8 Dinophyceae 23637
## 11 Mamiellophyceae 15846
## 14 Pelagophyceae 9306
## 18 Prasinophyceae_clade-VII 3575
## 5 Chrysophyceae 2824
## 7 Dictyochophyceae 2819
## 1 Bacillariophyceae 2664
## 6 Cryptophyceae 1922
## 2 Bolidophyceae 509
## 20 Pyramimonadaceae 507
## 25 other_Prasinophyceae 492
## 9 Eustigmatophyceae 359
## 3 Chlorarachniophyceae 327
## 13 Pavlovophyceae 228
## 17 Prasinophyceae_clade-IX 155
## 22 Trebouxiophyceae 121
## 4 Chlorophyceae 79
## 12 Nephroselmidophyceae 64
## 24 Xanthophyceae 21
## 21 Rhodophyceae 16
## 23 Ulvophyceae 6
## 15 Phaeophyceae 5
## 10 IncertaeSedis_Archaeplastida 1
## 16 Phaeothamniophyceae 1
## Group.1 x
## 8 Dinophyceae 1500
## 1 Bacillariophyceae 393
## 5 Chrysophyceae 252
## 19 Prymnesiophyceae 179
## 11 Mamiellophyceae 117
## 6 Cryptophyceae 107
## 22 Trebouxiophyceae 60
## 7 Dictyochophyceae 53
## 4 Chlorophyceae 52
## 18 Prasinophyceae_clade-VII 38
## 9 Eustigmatophyceae 27
## 14 Pelagophyceae 26
## 25 other_Prasinophyceae 26
## 2 Bolidophyceae 25
## 3 Chlorarachniophyceae 21
## 20 Pyramimonadaceae 20
## 17 Prasinophyceae_clade-IX 17
## 21 Rhodophyceae 15
## 13 Pavlovophyceae 14
## 24 Xanthophyceae 12
## 12 Nephroselmidophyceae 7
## 23 Ulvophyceae 5
## 15 Phaeophyceae 4
## 10 IncertaeSedis_Archaeplastida 2
## 16 Phaeothamniophyceae 1
## reads_per_class OTUs_per_class
## Bacillariophyceae 2664 393
## Bolidophyceae 509 25
## Chlorarachniophyceae 327 21
## Chlorophyceae 79 52
## Chrysophyceae 2824 252
## reads_per_class OTUs_per_class
## Prymnesiophyceae 34758 179
## Dinophyceae 23637 1500
## Mamiellophyceae 15846 117
## Pelagophyceae 9306 26
## Prasinophyceae_clade-VII 3575 38
## Chrysophyceae 2824 252
## Dictyochophyceae 2819 53
## Bacillariophyceae 2664 393
## Cryptophyceae 1922 107
## Bolidophyceae 509 25
## Pyramimonadaceae 507 20
## other_Prasinophyceae 492 26
## Eustigmatophyceae 359 27
## Chlorarachniophyceae 327 21
## Pavlovophyceae 228 14
## Prasinophyceae_clade-IX 155 17
## Trebouxiophyceae 121 60
## Chlorophyceae 79 52
## Nephroselmidophyceae 64 7
## Xanthophyceae 21 12
## Rhodophyceae 16 15
## Ulvophyceae 6 5
## Phaeophyceae 5 4
## IncertaeSedis_Archaeplastida 1 2
## Phaeothamniophyceae 1 1
## samples_per_class
## Prymnesiophyceae 43
## Dinophyceae 43
## Mamiellophyceae 43
## Pelagophyceae 113
## Prasinophyceae_clade-VII 116
## Chrysophyceae 103
## Dictyochophyceae 112
## Bacillariophyceae 112
## Cryptophyceae 108
## Bolidophyceae 111
## Pyramimonadaceae 97
## other_Prasinophyceae 65
## Eustigmatophyceae 35
## Chlorarachniophyceae 75
## Pavlovophyceae 75
## Prasinophyceae_clade-IX 50
## Trebouxiophyceae 18
## Chlorophyceae 1
## Nephroselmidophyceae 1
## Xanthophyceae 1
## Rhodophyceae 1
## Ulvophyceae 1
## Phaeophyceae 1
## IncertaeSedis_Archaeplastida 1
## Phaeothamniophyceae 1
## reads_per_class OTUs_per_class samples_per_class
## 100.000 100.000 3086.047
## reads_per_class OTUs_per_class
## Prymnesiophyceae 3.467409e+01 6.02085436
## Dinophyceae 2.357994e+01 50.45408678
## Mamiellophyceae 1.580775e+01 3.93541877
## Pelagophyceae 9.283534e+00 0.87453750
## Prasinophyceae_clade-VII 3.566369e+00 1.27817020
## Chrysophyceae 2.817182e+00 8.47628658
## Dictyochophyceae 2.812194e+00 1.78271107
## Bacillariophyceae 2.657569e+00 13.21897074
## Cryptophyceae 1.917360e+00 3.59905819
## Bolidophyceae 5.077712e-01 0.84090145
## Pyramimonadaceae 5.057760e-01 0.67272116
## other_Prasinophyceae 4.908122e-01 0.87453750
## Eustigmatophyceae 3.581333e-01 0.90817356
## Chlorarachniophyceae 3.262106e-01 0.70635721
## Pavlovophyceae 2.274496e-01 0.47090481
## Prasinophyceae_clade-IX 1.546258e-01 0.57181298
## Trebouxiophyceae 1.207079e-01 2.01816347
## Chlorophyceae 7.880928e-02 1.74907501
## Nephroselmidophyceae 6.384549e-02 0.23545240
## Xanthophyceae 2.094930e-02 0.40363269
## Rhodophyceae 1.596137e-02 0.50454087
## Ulvophyceae 5.985515e-03 0.16818029
## Phaeophyceae 4.987929e-03 0.13454423
## IncertaeSedis_Archaeplastida 9.975858e-04 0.06727212
## Phaeothamniophyceae 9.975858e-04 0.03363606
## samples_per_class
## Prymnesiophyceae 100.000000
## Dinophyceae 100.000000
## Mamiellophyceae 100.000000
## Pelagophyceae 262.790698
## Prasinophyceae_clade-VII 269.767442
## Chrysophyceae 239.534884
## Dictyochophyceae 260.465116
## Bacillariophyceae 260.465116
## Cryptophyceae 251.162791
## Bolidophyceae 258.139535
## Pyramimonadaceae 225.581395
## other_Prasinophyceae 151.162791
## Eustigmatophyceae 81.395349
## Chlorarachniophyceae 174.418605
## Pavlovophyceae 174.418605
## Prasinophyceae_clade-IX 116.279070
## Trebouxiophyceae 41.860465
## Chlorophyceae 2.325581
## Nephroselmidophyceae 2.325581
## Xanthophyceae 2.325581
## Rhodophyceae 2.325581
## Ulvophyceae 2.325581
## Phaeophyceae 2.325581
## IncertaeSedis_Archaeplastida 2.325581
## Phaeothamniophyceae 2.325581
OTUs per class vs. samples in which they occur [PLOT DESCRIPTION]
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_text).
Reads per class vs. OTUs per class [PLOT DESCRIPTION]
## Warning in trans$transform(limits): NaNs produced
Let’s read the dataset and remove the samples containing less than 36155 reads:
## [1] 28294 59
## OTU_no TARA_100_SUR_0d2_3 TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3
## 1 OTU_1 0 0 1
## 2 OTU_2 1 0 0
## 3 OTU_3 0 0 0
## 4 OTU_4 0 2 3
## 5 OTU_5 0 0 0
## TARA_110_SUR_0d2_3
## 1 0
## 2 0
## 3 0
## 4 1
## 5 0
## [1] 28294 58
## TARA_100_SUR_0d2_3 TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3
## OTU_1 0 0 1
## OTU_2 1 0 0
## OTU_3 0 0 0
## OTU_4 0 2 3
## OTU_5 0 0 0
## TARA_110_SUR_0d2_3 TARA_111_SUR_0d2_3
## OTU_1 0 0
## OTU_2 0 0
## OTU_3 0 0
## OTU_4 1 5
## OTU_5 0 0
## [1] 28294 3
## OTUId
## OTU_1 SILVA.v123_JN018772.1.1342_Bacteria;Proteobacteria;Alphaproteobacteria;SAR11_clade;Surface_1;uncultured_bacterium
## OTU_2 SILVA.v123_GU474892.2198.3716_Bacteria;Marinimicrobia__SAR406_clade_;uncultured_Marinimicrobia_bacterium_HF4000_22B16
## OTU_3 SILVA.v123_KC899221.1.1360_Bacteria;Proteobacteria;Gammaproteobacteria;Alteromonadales;Pseudoalteromonadaceae;Pseudoalteromonas;uncultured_Pseudoalteromonas_sp.
## OTU_4 SILVA.v123_HQ242598.1.1472_Bacteria;Proteobacteria;Gammaproteobacteria;Thiotrichales;Thiotrichales_Incertae_Sedis;Candidatus_Endoecteinascidia;uncultured_gamma_proteobacterium
## OTU_5 SILVA.v123_GQ337196.1.1462_Bacteria;Chloroflexi;SAR202_clade;uncultured_Chloroflexi_bacterium
## class_A class_B
## OTU_1 other_bacteria other_bacteria
## OTU_2 other_bacteria other_bacteria
## OTU_3 other_bacteria other_bacteria
## OTU_4 other_bacteria other_bacteria
## OTU_5 other_bacteria other_bacteria
## [1] 28294 55
## TARA_100_SUR_0d2_3 TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3
## OTU_1 0 0 1
## OTU_2 1 0 0
## OTU_3 0 0 0
## OTU_4 0 2 3
## OTU_5 0 0 0
## TARA_110_SUR_0d2_3 TARA_111_SUR_0d2_3
## OTU_1 0 0
## OTU_2 0 0
## OTU_3 0 0
## OTU_4 1 5
## OTU_5 0 0
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13690 50950 83020 77920 100900 158900
## MP1857 MP2243 MP1176 MP1421
## 13690 17749 21609 26962
## TARA_70_SUR_0d2_3 TARA_67_SUR_0d2_3
## 28965 33399
## [1] 28294 49
## [1] 28294 43
## [1] 28294 0
Table dimensions and content outline:
## [1] 28294 43
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## OTU_1 0 1 0
## OTU_2 0 0 0
## OTU_3 0 0 0
## OTU_4 2 3 1
## OTU_5 0 0 0
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3
## OTU_1 0 1
## OTU_2 0 0
## OTU_3 0 0
## OTU_4 5 2
## OTU_5 0 0
Minimum number of reads per station:
min(colSums(tb16_tax_occur_min36155))
## [1] 36155
Maximum number of reads per station:
max(colSums(tb16_tax_occur_min36155))
## [1] 158933
Identification of stations with higher number of reads:
amplicons_per_sample<-colSums(tb16_tax_occur_min36155)
amplicons_per_sample[which(colSums(tb16_tax_occur_min36155)>150000)]
## TARA_64_SUR_0d2_3 TARA_85_SUR_0d2_3
## 158933 150654
Overall reads per sample:
Let’s normalize the original dataset by randomly subsampling 36155 reads in each station:
tb16_tax_occur_min36155_t<-t(tb16_tax_occur_min36155)
tb16_tax_occur_ss36155<-rrarefy(tb16_tax_occur_min36155_t, 36155)
The normalized table shows the following dimensions and format:
## [1] 43 28294
## OTU_1 OTU_2 OTU_3 OTU_4 OTU_5
## TARA_102_SUR_0d2_3 0 0 0 1 0
## TARA_109_SUR_0d2_3 1 0 0 0 0
## TARA_110_SUR_0d2_3 0 0 0 0 0
## TARA_111_SUR_0d2_3 0 0 0 1 0
## TARA_112_SUR_0d2_3 0 0 0 0 0
Its content fits with the expected normalization values (36155 reads per station):
rowSums(tb16_tax_occur_ss36155)
## TARA_102_SUR_0d2_3 TARA_109_SUR_0d2_3 TARA_110_SUR_0d2_3
## 36155 36155 36155
## TARA_111_SUR_0d2_3 TARA_112_SUR_0d2_3 TARA_122_SUR_0d2_3
## 36155 36155 36155
## TARA_123_SUR_0d2_3 TARA_124_SUR_0d2_3 TARA_125_SUR_0d2_3
## 36155 36155 36155
## TARA_128_SUR_0d2_3 TARA_132_SUR_0d2_3 TARA_133_SUR_0d2_3
## 36155 36155 36155
## TARA_137_SUR_0d2_3 TARA_138_SUR_0d2_3 TARA_140_SUR_0d2_3
## 36155 36155 36155
## TARA_142_SUR_0d2_3 TARA_145_SUR_0d2_3 TARA_146_SUR_0d2_3
## 36155 36155 36155
## TARA_148_SUR_0d2_3 TARA_149_SUR_0d2_3 TARA_150_SUR_0d2_3
## 36155 36155 36155
## TARA_151_SUR_0d2_3 TARA_152_SUR_0d2_3 TARA_56_SUR_0d2_3
## 36155 36155 36155
## TARA_57_SUR_0d2_3 TARA_62_SUR_0d2_3 TARA_64_SUR_0d2_3
## 36155 36155 36155
## TARA_65_SUR_0d2_3 TARA_66_SUR_0d2_3 TARA_68_SUR_0d2_3
## 36155 36155 36155
## TARA_72_SUR_0d2_3 TARA_76_SUR_0d2_3 TARA_78_SUR_0d2_3
## 36155 36155 36155
## TARA_82_SUR_0d2_3 TARA_84_SUR_0d2_3 TARA_85_SUR_0d2_3
## 36155 36155 36155
## TARA_96_SUR_0d2_3 TARA_98_SUR_0d2_3 TARA_99_SUR_0d2_3
## 36155 36155 36155
## MP0311 MP1517 MP1672
## 36155 36155 36155
## MP2821
## 36155
Let’s check out how many OTUs don’t appear in the new table:
length(which(colSums(tb16_tax_occur_ss36155)==0)) #46
## [1] 8531
There are 8045 OTUs that don’t show any occurrence in the normalized data. Let’s remove them from the table and take a look at its final dimensions:
tb16_tax_occur_ss36155_no_cero<-tb16_tax_occur_ss36155[,-(which(colSums(tb16_tax_occur_ss36155)==0))]
tb16_tax_occur_ss36155_no_cero<-tb16_tax_occur_ss36155_no_cero[mixedorder(row.names(tb16_tax_occur_ss36155_no_cero)),]
dim(tb16_tax_occur_ss36155_no_cero) #92 913
## [1] 43 19763
Datasets summary:
dim(tb16_tax) #959 128
## [1] 28294 58
dim(tb16_tax_occur) #959 122
## [1] 28294 55
dim(tb16_tax_occur_ss36155_no_cero) #92 913
## [1] 43 19763
Most of the samples take Shannon Index values around 6:
Lowest number of OTUs per sample:
## [1] 2511
Maximum number of OTUs per sample:
## [1] 5077
In most of the samples, we can identify between 600 and 700 OTUs:
plot(sort(OTUs_per_sample_16S_tax_occur_ss36155), pch=19)
boxplot(OTUs_per_sample_16S_tax_occur_ss36155, pch=19)
pielou_evenness_16S_tax_occur_ss36155<-tb16_tax_occur_ss36155_div/log(OTUs_per_sample_16S_tax_occur_ss36155)
The Pielou index (constrained between 0 and 1) takes values closer to 1 as the variation of species proportion in a sample increases. Most of the samples get values around 0.95, meaning that the numerical composition of different OTUs in a sample is highly variable - there’s no constant presence of dominant species.
The less variation in communities between the species (and the presence of a dominant specie), the lower J’ is.
plot(sort(pielou_evenness_16S_tax_occur_ss36155), pch=19)
boxplot(pielou_evenness_16S_tax_occur_ss36155, pch=19)
The OTU_38, with 890 reads, is the most abundant in the overall dataset:
head(sort(colSums(tb16_tax_occur_ss36155_no_cero), decreasing=T), n=10L)
## OTU_1754 OTU_2893 OTU_761 OTU_2428 OTU_3736 OTU_1942 OTU_2466 OTU_3271
## 17333 13350 13260 11886 11165 10033 9996 8685
## OTU_5398 OTU_5330
## 8431 7718
Most of the OTUs show very few occurrences; suggesting that we will probably be able to identify a significant ammount of rare otus:
plot(log(sort(colSums(tb16_tax_occur_ss36155_no_cero), decreasing=T)), pch=19)
The OTUs abundance distribution fits relativelly close to log-normal model.
According to Preston’s lognormal model fit into species frequencies groups, we’re missing 1588.816 species:
tb16_tax_occur_ss36155_prestonfit<-prestonfit(colSums(tb16_tax_occur_min36155_t))
plot(tb16_tax_occur_ss36155_prestonfit, main="Pooled species")
veiledspec(tb16_tax_occur_ss36155_prestonfit)
## Extrapolated Observed Veiled
## 108452.1 24691.0 83761.1
When computing Prestons’ lognormal model fit without pooling data into groups, we seem to miss 1385.371 species:
## Extrapolated Observed Veiled
## 75267.64 24691.00 50576.64
rarec_input<-t(as.matrix(colSums(tb16_tax_occur_ss36155_no_cero)))
#tb16_rarecurve_step1000_10000<-rarecurve(rarec_input, step = 1000, 10000, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="16S amplicons diversity step=1000 & ss=10000\n(40,816 OTUs; 5,247,375 reads)\n")
#tb16_rarecurve_step1000_10000
tb16_rarecurve_step100_5700<-rarecurve(rarec_input, step = 1000, 19700, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="16S amplicons diversity step=100 & ss=5700\n(19,756 OTUs; 1,554,665 reads)\n")
#without rarefying
rarec_allOTUs_input<-t(as.matrix(colSums(t(tb16_tax_occur))))
tb16_rarecurve_allOTUs_step100_8642<-rarecurve(rarec_allOTUs_input, step = 1000, 28200, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="16S amplicons diversity non-rarefied step=1000 & ss=100000\n(28,294 OTUs; 4,285,523 reads)\n")
The Bray-Curtis dissimilarity, constrained between 0 (minimum distance) and 1 (highest dissimilarity) allows us to quantify the differences between samples according to the composition and relative abundance of their OTUs. In our dataset, most of the samples pairs take dissimilarity values around 0.8, meaning that their composition is substantially different.
The stations seem to form clusters according to geographic localization, but there are no evident clusters separated from the general groups, apart from the one containing TARA_84, TARA_82 and TARA_85.
(To be done: assign Longhurst provinces information to each station and check if any of the central clusters is meaningful regarding to the samples’ geographical location)
We can identify a prominent group in the central part of the NMDS plot and a few outliers (TARA 82, 84 and 85) in the central-right edge of the plot. The stress parameter takes a value below 0.2, suggesting that the plot is acceptable.
##
## Call:
## monoMDS(dist = tb16_tax_occur_ss36155_no_cero.bray)
##
## Non-metric Multidimensional Scaling
##
## 43 points, dissimilarity 'bray', call 'vegdist(x = tb16_tax_occur_ss36155_no_cero, method = "bray")'
##
## Dimensions: 2
## Stress: 7.66663e-05
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 116 iterations: Stress nearly zero (< smin)
When implementing a most robut function for computing NMDS plots, the result is quiet the same:
## Run 0 stress 9.985152e-05
## Run 1 stress 9.987095e-05
## ... Procrustes: rmse 6.486584e-05 max resid 0.0002957008
## ... Similar to previous best
## Run 2 stress 9.64942e-05
## ... New best solution
## ... Procrustes: rmse 1.626488e-05 max resid 7.824625e-05
## ... Similar to previous best
## Run 3 stress 9.628563e-05
## ... New best solution
## ... Procrustes: rmse 3.677841e-06 max resid 1.505732e-05
## ... Similar to previous best
## Run 4 stress 9.206385e-05
## ... New best solution
## ... Procrustes: rmse 1.267344e-05 max resid 3.270834e-05
## ... Similar to previous best
## Run 5 stress 9.344504e-05
## ... Procrustes: rmse 6.186297e-05 max resid 0.0001878703
## ... Similar to previous best
## Run 6 stress 9.751818e-05
## ... Procrustes: rmse 7.032632e-05 max resid 0.000206771
## ... Similar to previous best
## Run 7 stress 9.813187e-05
## ... Procrustes: rmse 5.572368e-05 max resid 0.0002609441
## ... Similar to previous best
## Run 8 stress 9.341067e-05
## ... Procrustes: rmse 3.81964e-05 max resid 0.0001407819
## ... Similar to previous best
## Run 9 stress 7.181514e-05
## ... New best solution
## ... Procrustes: rmse 2.218769e-05 max resid 5.003509e-05
## ... Similar to previous best
## Run 10 stress 9.109377e-05
## ... Procrustes: rmse 5.475929e-05 max resid 0.0002349282
## ... Similar to previous best
## Run 11 stress 9.781534e-05
## ... Procrustes: rmse 5.708114e-05 max resid 0.0002415838
## ... Similar to previous best
## Run 12 stress 9.916223e-05
## ... Procrustes: rmse 3.201431e-05 max resid 6.966896e-05
## ... Similar to previous best
## Run 13 stress 9.722294e-05
## ... Procrustes: rmse 2.915755e-05 max resid 6.467888e-05
## ... Similar to previous best
## Run 14 stress 9.514618e-05
## ... Procrustes: rmse 5.950623e-05 max resid 0.0002176754
## ... Similar to previous best
## Run 15 stress 9.58663e-05
## ... Procrustes: rmse 4.567358e-05 max resid 0.0001514342
## ... Similar to previous best
## Run 16 stress 8.324376e-05
## ... Procrustes: rmse 6.040324e-05 max resid 0.0002344572
## ... Similar to previous best
## Run 17 stress 9.675721e-05
## ... Procrustes: rmse 5.497252e-05 max resid 0.0002349048
## ... Similar to previous best
## Run 18 stress 9.328282e-05
## ... Procrustes: rmse 2.492408e-05 max resid 6.550225e-05
## ... Similar to previous best
## Run 19 stress 9.692258e-05
## ... Procrustes: rmse 5.376792e-05 max resid 0.0002431635
## ... Similar to previous best
## Run 20 stress 8.975387e-05
## ... Procrustes: rmse 4.531961e-05 max resid 0.0001704904
## ... Similar to previous best
## *** Solution reached
## Warning in metaMDS(tb16_tax_occur_ss36155_no_cero.bray): Stress is (nearly)
## zero - you may have insufficient data
## Warning in ordiplot(x, choices = choices, type = type, display = display, :
## Species scores not available
## Warning in if (class(lats) == "SpatialPoints") lats <- coordinates(lats):
## the condition has length > 1 and only the first element will be used
Working datasets:
dim(tb16_tax_occur_ss36155_no_cero)
## [1] 43 19763
tb16_tax_occur_ss36155_no_cero[1:5, 1:5]
## OTU_1 OTU_3 OTU_4 OTU_5 OTU_7
## MP0311 0 0 0 0 0
## MP1517 0 0 3 0 1
## MP1672 0 0 0 0 0
## MP2821 1 0 0 0 1
## TARA_102_SUR_0d2_3 0 0 1 0 0
tb16_tax_occur_ss36155_no_cero.bray<-as.matrix(tb16_tax_occur_ss36155_no_cero.bray)
## [1] 43 43
dim(geo_distances_MP_16S)
## [1] 43 43
Communities quickly change their composition across geographical distances:
plot(geo_distances_MP_16S, tb16_tax_occur_ss36155_no_cero.bray, pch=19, cex=0.4, xlab="Geopgraphical distances", ylab="Bray-Curtis dissimilarities")
Mantel statistic is -significantlly- so low, meaning that the correlation between samples dissimilarity and geographical distances is weak.
mantel(geo_distances_MP_16S, tb16_tax_occur_ss36155_no_cero.bray)
##
## Mantel statistic based on Pearson's product-moment correlation
##
## Call:
## mantel(xdis = geo_distances_MP_16S, ydis = tb16_tax_occur_ss36155_no_cero.bray)
##
## Mantel statistic r: 0.02826
## Significance: 0.279
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.0873 0.1167 0.1444 0.1697
## Permutation: free
## Number of permutations: 999
Maximum distance between samples:
## [1] 19543.94
Minimum distance between samples:
## [1] 0
Correlograms:
MP_16s_ss36155_mantel_correl_by_1000km<-mantel.correlog(tb16_tax_occur_ss36155_no_cero.bray, D.geo=geo_distances_MP_16S, break.pts=seq(0,20000, by=1000))
plot(MP_16s_ss36155_mantel_correl_by_1000km)
MP_16s_ss36155_mantel_correl_by_100km<-mantel.correlog(tb16_tax_occur_ss36155_no_cero.bray, D.geo=geo_distances_MP_16S, break.pts=seq(0,20000, by=100))
plot(MP_16s_ss36155_mantel_correl_by_100km)
In the following plot, we can appreciate the OTUs distribution according to their percentage of occurence and relative abundance. The red line keeps up OTUs that occur in more than 80% of the samples, the green line limits regionally rare OTUs (< 0.001%), and the blue one restricts regionally abundant OTUs (> 0.1%).
Regionally abundant OTUs (relative abundance over 0.1%):
## mean_rabund perc_occur
## OTU_1754 0.011149026 93.02326
## OTU_2893 0.008587059 93.02326
## OTU_761 0.008529169 93.02326
## OTU_2428 0.007645377 93.02326
## OTU_3736 0.007181611 93.02326
## OTU_1942 0.006453480 93.02326
## OTU_2466 0.006429681 93.02326
## OTU_3271 0.005586413 90.69767
## OTU_5398 0.005423033 90.69767
## OTU_5330 0.004964414 93.02326
## OTU_27927 0.003777663 48.83721
## OTU_5352 0.003766728 58.13953
## OTU_3187 0.003630364 100.00000
## OTU_27901 0.003573117 81.39535
## OTU_2866 0.003430321 90.69767
## OTU_6083 0.003165955 86.04651
## OTU_8011 0.003102276 83.72093
## OTU_709 0.003061110 93.02326
## OTU_635 0.003025732 93.02326
## OTU_3172 0.002888082 88.37209
## OTU_3311 0.002843056 90.69767
## OTU_5727 0.002715698 93.02326
## OTU_3259 0.002529805 58.13953
## OTU_5447 0.002478991 100.00000
## OTU_29 0.002464840 100.00000
## OTU_5403 0.002407593 90.69767
## OTU_27900 0.002379934 100.00000
## OTU_4048 0.002347129 100.00000
## OTU_3162 0.002286023 95.34884
## OTU_1329 0.002241640 93.02326
## OTU_5633 0.002238424 95.34884
## OTU_5432 0.002228776 93.02326
## OTU_28061 0.002150302 51.16279
## OTU_5441 0.002136152 93.02326
## OTU_6164 0.002118784 93.02326
## OTU_3308 0.002082121 97.67442
## OTU_3004 0.002071829 93.02326
## OTU_2877 0.002066683 95.34884
## OTU_1676 0.002044814 88.37209
## OTU_8163 0.002039668 100.00000
## OTU_5635 0.002038381 100.00000
## OTU_5978 0.001993355 100.00000
## OTU_5961 0.001958621 100.00000
## OTU_3023 0.001958621 93.02326
## OTU_8153 0.001957335 93.02326
## OTU_1738 0.001929676 93.02326
## OTU_5772 0.001898801 100.00000
## OTU_6067 0.001894942 100.00000
## OTU_5752 0.001840911 90.69767
## OTU_5653 0.001828047 93.02326
## OTU_27912 0.001820971 46.51163
## OTU_3197 0.001815825 81.39535
## OTU_5301 0.001794599 95.34884
## OTU_3240 0.001783021 93.02326
## OTU_3810 0.001757292 93.02326
## OTU_4926 0.001751503 93.02326
## OTU_5879 0.001743784 93.02326
## OTU_4664 0.001741211 100.00000
## OTU_4366 0.001663381 100.00000
## OTU_4560 0.001658878 100.00000
## OTU_3680 0.001653732 93.02326
## OTU_9255 0.001645371 86.04651
## OTU_27934 0.001615139 95.34884
## OTU_5327 0.001590696 93.02326
## OTU_333 0.001588123 100.00000
## OTU_4991 0.001551460 95.34884
## OTU_6155 0.001512866 100.00000
## OTU_542 0.001498072 93.02326
## OTU_3731 0.001496142 100.00000
## OTU_5691 0.001470413 93.02326
## OTU_5607 0.001467197 93.02326
## OTU_23 0.001443398 93.02326
## OTU_7987 0.001436966 100.00000
## OTU_3198 0.001436966 93.02326
## OTU_5804 0.001425387 93.02326
## OTU_8367 0.001419598 100.00000
## OTU_5662 0.001409950 90.69767
## OTU_5968 0.001397086 100.00000
## OTU_3803 0.001385507 100.00000
## OTU_5741 0.001373929 100.00000
## OTU_5500 0.001371357 100.00000
## OTU_5651 0.001368140 93.02326
## OTU_5985 0.001362351 100.00000
## OTU_5667 0.001355276 93.02326
## OTU_27938 0.001353989 93.02326
## OTU_7407 0.001341125 90.69767
## OTU_5665 0.001334693 93.02326
## OTU_5925 0.001328260 86.04651
## OTU_8458 0.001327617 100.00000
## OTU_5742 0.001326974 93.02326
## OTU_4036 0.001322471 97.67442
## OTU_5564 0.001316682 93.02326
## OTU_3326 0.001315396 90.69767
## OTU_5911 0.001307034 100.00000
## OTU_6102 0.001303175 93.02326
## OTU_3347 0.001297386 97.67442
## OTU_5511 0.001293526 93.02326
## OTU_5624 0.001290953 95.34884
## OTU_5445 0.001281948 100.00000
## OTU_5942 0.001259435 100.00000
## OTU_5449 0.001256219 93.02326
## OTU_5823 0.001253003 93.02326
## OTU_28017 0.001251717 79.06977
## OTU_5491 0.001247857 100.00000
## OTU_5519 0.001240782 48.83721
## OTU_28046 0.001222771 55.81395
## OTU_5523 0.001213123 100.00000
## OTU_5288 0.001213123 90.69767
## OTU_5586 0.001207334 95.34884
## OTU_3214 0.001204761 93.02326
## OTU_5414 0.001204118 100.00000
## OTU_5268 0.001193826 100.00000
## OTU_5786 0.001188681 83.72093
## OTU_5266 0.001177102 93.02326
## OTU_6193 0.001176459 100.00000
## OTU_5493 0.001173886 95.34884
## OTU_5880 0.001172600 90.69767
## OTU_8377 0.001168741 100.00000
## OTU_7455 0.001168097 100.00000
## OTU_5976 0.001164881 93.02326
## OTU_8146 0.001164238 100.00000
## OTU_5355 0.001160379 100.00000
## OTU_10747 0.001144941 55.81395
## OTU_934 0.001139795 100.00000
## OTU_12129 0.001116639 65.11628
## OTU_9267 0.001112780 83.72093
## OTU_5982 0.001111493 93.02326
## OTU_6047 0.001099915 100.00000
## OTU_5821 0.001091553 93.02326
## OTU_5222 0.001090910 93.02326
## OTU_5211 0.001081262 95.34884
## OTU_8436 0.001075473 100.00000
## OTU_4083 0.001072257 95.34884
## OTU_5428 0.001070970 100.00000
## OTU_3789 0.001064538 100.00000
## OTU_520 0.001058749 93.02326
## OTU_5499 0.001057463 93.02326
## OTU_5817 0.001042025 100.00000
## OTU_3207 0.001030447 90.69767
## OTU_223 0.001027231 93.02326
## OTU_5815 0.001025301 100.00000
## OTU_759 0.001018226 86.04651
## OTU_2789 0.001014366 100.00000
## OTU_3173 0.001013080 83.72093
## OTU_5988 0.001011150 93.02326
## OTU_5800 0.001009864 93.02326
## OTU_2980 0.001006648 100.00000
## OTU_790 0.001005361 100.00000
## OTU_28060 0.001001502 41.86047
dim(tb16_ss36155_abundant_sorted)
## [1] 149 2
Proportion of regionally abundant OTUs (%):
## [1] 0.7539341
Cosmopolitan OTUs (relative abundance over 0.1% and occurence in more than 80% of samples):
## mean_rabund perc_occur
## OTU_3187 0.003630364 100.00000
## OTU_5447 0.002478991 100.00000
## OTU_29 0.002464840 100.00000
## OTU_27900 0.002379934 100.00000
## OTU_4048 0.002347129 100.00000
## OTU_8163 0.002039668 100.00000
## OTU_5635 0.002038381 100.00000
## OTU_5978 0.001993355 100.00000
## OTU_5961 0.001958621 100.00000
## OTU_5772 0.001898801 100.00000
## OTU_6067 0.001894942 100.00000
## OTU_4664 0.001741211 100.00000
## OTU_4366 0.001663381 100.00000
## OTU_4560 0.001658878 100.00000
## OTU_333 0.001588123 100.00000
## OTU_6155 0.001512866 100.00000
## OTU_3731 0.001496142 100.00000
## OTU_7987 0.001436966 100.00000
## OTU_8367 0.001419598 100.00000
## OTU_5968 0.001397086 100.00000
## OTU_3803 0.001385507 100.00000
## OTU_5741 0.001373929 100.00000
## OTU_5500 0.001371357 100.00000
## OTU_5985 0.001362351 100.00000
## OTU_8458 0.001327617 100.00000
## OTU_5911 0.001307034 100.00000
## OTU_5445 0.001281948 100.00000
## OTU_5942 0.001259435 100.00000
## OTU_5491 0.001247857 100.00000
## OTU_5523 0.001213123 100.00000
## OTU_5414 0.001204118 100.00000
## OTU_5268 0.001193826 100.00000
## OTU_6193 0.001176459 100.00000
## OTU_8377 0.001168741 100.00000
## OTU_7455 0.001168097 100.00000
## OTU_8146 0.001164238 100.00000
## OTU_5355 0.001160379 100.00000
## OTU_934 0.001139795 100.00000
## OTU_6047 0.001099915 100.00000
## OTU_8436 0.001075473 100.00000
## OTU_5428 0.001070970 100.00000
## OTU_3789 0.001064538 100.00000
## OTU_5817 0.001042025 100.00000
## OTU_5815 0.001025301 100.00000
## OTU_2789 0.001014366 100.00000
## OTU_2980 0.001006648 100.00000
## OTU_790 0.001005361 100.00000
## OTU_3308 0.002082121 97.67442
## OTU_4036 0.001322471 97.67442
## OTU_3347 0.001297386 97.67442
## OTU_3162 0.002286023 95.34884
## OTU_5633 0.002238424 95.34884
## OTU_2877 0.002066683 95.34884
## OTU_5301 0.001794599 95.34884
## OTU_27934 0.001615139 95.34884
## OTU_4991 0.001551460 95.34884
## OTU_5624 0.001290953 95.34884
## OTU_5586 0.001207334 95.34884
## OTU_5493 0.001173886 95.34884
## OTU_5211 0.001081262 95.34884
## OTU_4083 0.001072257 95.34884
## OTU_1754 0.011149026 93.02326
## OTU_2893 0.008587059 93.02326
## OTU_761 0.008529169 93.02326
## OTU_2428 0.007645377 93.02326
## OTU_3736 0.007181611 93.02326
## OTU_1942 0.006453480 93.02326
## OTU_2466 0.006429681 93.02326
## OTU_5330 0.004964414 93.02326
## OTU_709 0.003061110 93.02326
## OTU_635 0.003025732 93.02326
## OTU_5727 0.002715698 93.02326
## OTU_1329 0.002241640 93.02326
## OTU_5432 0.002228776 93.02326
## OTU_5441 0.002136152 93.02326
## OTU_6164 0.002118784 93.02326
## OTU_3004 0.002071829 93.02326
## OTU_3023 0.001958621 93.02326
## OTU_8153 0.001957335 93.02326
## OTU_1738 0.001929676 93.02326
## OTU_5653 0.001828047 93.02326
## OTU_3240 0.001783021 93.02326
## OTU_3810 0.001757292 93.02326
## OTU_4926 0.001751503 93.02326
## OTU_5879 0.001743784 93.02326
## OTU_3680 0.001653732 93.02326
## OTU_5327 0.001590696 93.02326
## OTU_542 0.001498072 93.02326
## OTU_5691 0.001470413 93.02326
## OTU_5607 0.001467197 93.02326
## OTU_23 0.001443398 93.02326
## OTU_3198 0.001436966 93.02326
## OTU_5804 0.001425387 93.02326
## OTU_5651 0.001368140 93.02326
## OTU_5667 0.001355276 93.02326
## OTU_27938 0.001353989 93.02326
## OTU_5665 0.001334693 93.02326
## OTU_5742 0.001326974 93.02326
## OTU_5564 0.001316682 93.02326
## OTU_6102 0.001303175 93.02326
## OTU_5511 0.001293526 93.02326
## OTU_5449 0.001256219 93.02326
## OTU_5823 0.001253003 93.02326
## OTU_3214 0.001204761 93.02326
## OTU_5266 0.001177102 93.02326
## OTU_5976 0.001164881 93.02326
## OTU_5982 0.001111493 93.02326
## OTU_5821 0.001091553 93.02326
## OTU_5222 0.001090910 93.02326
## OTU_520 0.001058749 93.02326
## OTU_5499 0.001057463 93.02326
## OTU_223 0.001027231 93.02326
## OTU_5988 0.001011150 93.02326
## OTU_5800 0.001009864 93.02326
## OTU_3271 0.005586413 90.69767
## OTU_5398 0.005423033 90.69767
## OTU_2866 0.003430321 90.69767
## OTU_3311 0.002843056 90.69767
## OTU_5403 0.002407593 90.69767
## OTU_5752 0.001840911 90.69767
## OTU_5662 0.001409950 90.69767
## OTU_7407 0.001341125 90.69767
## OTU_3326 0.001315396 90.69767
## OTU_5288 0.001213123 90.69767
## OTU_5880 0.001172600 90.69767
## OTU_3207 0.001030447 90.69767
## OTU_3172 0.002888082 88.37209
## OTU_1676 0.002044814 88.37209
## OTU_6083 0.003165955 86.04651
## OTU_9255 0.001645371 86.04651
## OTU_5925 0.001328260 86.04651
## OTU_759 0.001018226 86.04651
## OTU_8011 0.003102276 83.72093
## OTU_5786 0.001188681 83.72093
## OTU_9267 0.001112780 83.72093
## OTU_3173 0.001013080 83.72093
## OTU_27901 0.003573117 81.39535
## OTU_3197 0.001815825 81.39535
dim(otu_tb16_ss36155_cosmop_sorted)
## [1] 138 2
Number and proportion (%) of cosmopolitan OTUs:
## [1] 138
## [1] 0.6982746
Number and proportion (%) of rare OTUs:
nrow(otu_tb16_ss36155_rabund_percoccur[otu_tb16_ss36155_rabund_percoccur$mean_rabund < 0.00001 & otu_tb16_ss36155_rabund_percoccur$mean_rabund >0,])
## [1] 13963
## [1] 70.65223